KIRC Analysis

In [1]:
cd ../src
/cellar/users/agross/TCGA_Code/TCGA/Pathway_Merge/src
In [2]:
import os as os
import pickle as pickle
import subprocess
import pandas as pd

from Processing.Tests import run_feature_matrix, SurvivalTest
In [3]:
pd.set_option('precision',3)
In [4]:
drugs = {'TKI': ['perifosine', 'sunitinib', 'sorafenib', 'pazopanib', 'sutent', 'tarceva','nexavaar',
         'sutent (sunitinib)', 'gefitinib', 'nexavar', 'bay-439006', 
         'azd', 'iressa', 'sorafenib - nexavar', 'axitinib', 'sunitinib (sutent)', 'tipifarnib',
         'tyrosine kinase inhibitor', 'votrient', 'zd6474'],
         'VEGF Ab': ['bevacizumab', 'avastin'],
         'mTORi': ['temsirolimus','everolimus','rad001','torisel','afinitor'],
         'IL2/IF': ['interferon', 'il-2','il-2 (high dose)','proleukin (il-2)',
          'interleukin-2','interferon-alpha','interferon alpha',
          'intron a', 'alpha interferon', 'proleukin'
          'roferon-a','il-2 thearpy (interleukin)','high dose interleukin-2',
          'ifn-alpha (intron)', 'interleukin 2-high dose', 'inf'],
          'Chemo': ['bortezomib', 'gemictiabine', '5-fluorouracil','capecitabine','gemzar','thalidomide','nab-rapamycin',
                    'capecitabin', 'gemcitabine','xeloda'],
          'Vaccine': ['oncophage', 'oncophage vaccine']}
drug_map = {drug:family for family, drugs in drugs.items() for drug in drugs}
In [5]:
result_path = '/scratch/TCGA/Firehose__2012_01_16/ucsd_analyses'
run = sorted(os.listdir(result_path))[1]
run = pickle.load(open('/'.join([result_path, run, 'RunObject.p']), 'rb'))
In [6]:
cancer = run.load_cancer('KIRC')
clinical = cancer.load_clinical()
global_vars = cancer.load_global_vars()
In [7]:
clinical.clinical.tumor_stage.value_counts()
Out[7]:
stge i      241
stge iii    121
stge iv      76
stge ii      49
In [8]:
mut = cancer.load_data('MAF')
mut.uncompress()
meth = cancer.load_data('Methylation')
cn = cancer.load_data('CN_broad')
cn.uncompress()
rna = cancer.load_data('mRNASeq')
rppa  = cancer.load_data('RPPA')

Section 1: Stratification Variables

Clinical Variables

In [9]:
stage = clinical.clinical.tumor_stage
stage = stage.map(lambda s: s.replace('stge', 'stage'))
stage.value_counts().sort_index().plot(kind='bar')
Out[9]:
<matplotlib.axes.AxesSubplot at 0x738a8d0>
In [10]:
fig, axs = subplots(1,2, figsize=(12,4))
v = clinical.clinical.tumor_grade
v.value_counts().sort_index().plot(kind='bar', title=v.name, ax=axs[0]);
v = clinical.clinical.tumor_grade.dropna().map(lambda s: s[:2])
v.value_counts().sort_index().plot(kind='bar', title=v.name, ax=axs[1]);
axs[0].set_ylabel('# of Patients')
Out[10]:
<matplotlib.text.Text at 0x71ae710>
In [11]:
age = clinical.clinical.age
by_stage = pd.DataFrame({s: age[stage[stage==s].index].describe() for s in stage.unique()})
all_stage = pd.Series(clinical.clinical.age.describe(), name='All')
by_stage.join(all_stage).astype(object)
Out[11]:
stage i stage ii stage iii stage iv All
count 241 49 120 76 486
mean 60.3 60.7 63.3 61.2 61.2
std 12.8 12.5 12.1 9.9 12.2
min 26.6 39.3 32.1 33.5 26.6
25% 51.4 49.5 56 55.7 52.2
50% 59.8 59.7 63.9 61.3 61.2
75% 70.2 69.8 72.9 66.2 70.4
max 90.1 86.5 88.7 84.2 90.1
In [12]:
age.hist()
ylabel('# of Patients')
xlabel('Age')
Out[12]:
<matplotlib.text.Text at 0x70837d0>
In [13]:
clinical.clinical.lymphnode_stage.value_counts().plot(kind='bar')
ylabel('# of Patients')
xlabel('Status')
Out[13]:
<matplotlib.text.Text at 0x739d950>
In [14]:
clinical.clinical.gender.value_counts().plot(kind='bar')
ylabel('# of Patients')
xlabel('Gender')
Out[14]:
<matplotlib.text.Text at 0x72dec90>
In [15]:
pd.crosstab(stage, clinical.clinical.calcium_level)
Out[15]:
calcium_level elevated low normal
tumor_stage
stage i 1 109 50
stage ii 0 16 15
stage iii 3 46 35
stage iv 6 22 30
In [16]:
s = pd.crosstab(stage, clinical.clinical.calcium_level).ix['stage iv']
s[['low','normal','elevated']].plot(kind='bar')
ylabel('# of Patients')
xlabel('Status')
Out[16]:
<matplotlib.text.Text at 0x749c850>
In [17]:
pd.crosstab(stage, clinical.clinical.eastern_cancer_oncology_group.dropna())
Out[17]:
eastern_cancer_oncology_group 0 1 2
tumor_stage
stage i 18 2 2
stage ii 3 2 0
stage iii 13 3 0
stage iv 15 9 3
In [18]:
clinical.clinical.hemoglobin.value_counts().plot(kind='bar')
ylabel('# of Patients')
xlabel('Hemoglobin Level')
Out[18]:
<matplotlib.text.Text at 0x973e650>
In [19]:
import Reports.Figures as F
reload(F)
from Reports.Figures import *
from Processing.Tests import *
from Reports.NotebookTools import *
In [20]:
def draw_survival_curves(feature, surv, assignment=None, filename='tmp.png', show=False, 
                               title=True, labels=['No Mutation', 'Mutation'], 
                               colors=['blue','red'], ann=None, show_legend=True, q=.25):
    if assignment is None:
        num_panels = 1
    else:
        num_panels = len(assignment.unique())
        
    r.png(filename=filename, width=200*(num_panels+1), height=300, res=75)
        
    fmla = robjects.Formula('Surv(days, event) ~ feature')
    r.par(mfrow=r.c(1, num_panels))
    r.par(mar=r.c(4,5,4,1))
    r.par(xpd=True)
    
    if (get_vec_type(feature) == 'real') and (len(feature.unique()) > 5):
        colors=['blue','orange','red']
        if q == .5:
            labels=['Bottom 50%', 'Top 50%']
        else:
            labels=['Bottom {}%'.format(int(q*100)), 'Normal', 'Top {}%'.format(int(q*100))]
            
    ls = r.c(*colors)
    
    def plot_me(sub_f, label):
        if (get_vec_type(sub_f) == 'real') and (len(sub_f.unique()) > 5):
            sub_f = to_quants(sub_f, q=q)
        m = get_cox_ph_ms(surv, sub_f, return_val='model', formula=fmla)
        r_data = m.rx2('call')[2]
        s = survival.survdiff(fmla, r_data)
        p = str(s).split('\n\n')[-1].strip().split(', ')[-1]
        ls = r.c(*colors)
        
        
        r.plot(survival.survfit(fmla, r_data), lty=1, col=ls, lwd=4, cex=1.25, 
                                xlab='Years to Event', ylab='Survival');
        r.title(label, cex=3.)
        if ann=='p':
            r.text(0, labels='logrank ' + p, pos=4)
        elif ann != None:
            r.text(0, labels=ann, pos=4)
     
    if assignment is None:
        assignment = np.ones_like(feature)
        name = lambda v: feature.name
    else:
        name = lambda v: str(assignment.name) + ' = ' + str(v)
    if show_legend == 'out':  
        r.par(xpd=True, mar=r.c(4,5,5,8))
    for value in sorted(assignment.ix[feature.index].dropna().unique()):
        plot_me(feature.ix[assignment[assignment==value].index], name(value))

    if show_legend == True:
        mean_s = surv.ix[:,'event'].ix[assignment[assignment==value].index].mean()
        if mean_s < .4:
            r.legend(surv.ix[:,'days'].max() * .05 / 365., .45, labels, 
                     lty=1, col=ls, lwd=3, bty='o')
        else:
            r.legend(surv.ix[:,'days'].max() * .4 / 365, .9, labels, 
                     lty=1, col=ls, lwd=3, bty='o')
    elif show_legend == 'out':
        r.legend(surv.ix[:,'days'].max() * 1.1  / 365, .9, labels, 
                     lty=1, col=ls, lwd=3, bty='o')
    r('dev.off()')
    if show:
        return Show(filename)
In [21]:
stage = clinical.clinical.tumor_stage.map({'stge i': 'Stage I', 'stge ii': 'Stage II', 'stge iii': 'Stage III', 'stge iv': 'Stage IV'})
surv = clinical.survival.survival_5y
In [22]:
f = stage
f.name = 'Overall Survival'
t = get_surv_fit(surv, f)
t.columns = pd.MultiIndex.from_tuples([('','# Patients'), ('','# Deaths'), 
                           ('', 'Median OS'), ('95% Confidence Int.', 'Lower'),
                           ('95% Confidence Int.', 'Upper')])
f = draw_survival_curves(f, surv, colors=['green','blue','orange','red'], labels=list(f.unique()), show=True)
fig_tab(f, t)
Out[22]:
95% Confidence Int.
# Patients # Deaths Median OS Lower Upper
Stage I 241 29 NaN NaN NaN
Stage II 49 6 NaN NaN NaN
Stage III 121 45 4.48 3.21 NaN
Stage IV 76 54 1.78 1.26 3.28

Drugs administered

In [17]:
drugs_types = drugs.keys()
drug_categories = clinical.drugs.drugname.map(drug_map)
drug_given = pd.DataFrame({d: ((drug_categories == d).groupby(level=0).sum() > 0) for d in drugs_types})
In [18]:
fig, axs = subplots(1,2, figsize=(12,4))
crosstab(stage, drug_given.sum(1) > 0)[True].plot(kind='bar', ax=axs[0])
axs[0].set_ylabel('# of Patients')
axs[0].set_title('Patients Receiving Medication By Stage')

drug_given.sum().plot(kind='bar', ax=axs[1]);
axs[1].set_title('Drug Categories');
In [609]:
drug_given.sum()
Out[609]:
Chemo      11
IL2/IF     29
TKI        49
VEGF Ab    11
Vaccine     7
mTORi      12
In [47]:
s = drug_given.ix[stage.index[stage == 'Stage IV']].sum()
s.plot(kind='bar')
ylabel('# of Patients')
Out[47]:
<matplotlib.text.Text at 0x91c0590>
In [48]:
n = drug_given.ix[stage.index[stage == 'Stage IV']].dropna().sum(1).value_counts()[:5]
n.name = 'Number of Medications'
n.plot(kind='bar')
ylabel('# of Patients')
Out[48]:
<matplotlib.text.Text at 0x92f4710>
In [75]:
gc = drug_given.astype(int).astype(str).apply(lambda s: ''.join(s), axis=1)
gc.name = 'drugs'
In [76]:
one_drug = drug_given.ix[stage.index[stage == 'Stage IV']].dropna().sum(1) == 1
In [77]:
vc = drug_given.ix[one_drug[one_drug].index].sum().order()
vc.plot(kind='bar')
ylabel('# of Patients')
Out[77]:
<matplotlib.text.Text at 0x1214f250>
In [78]:
surv = clinical.survival.survival_5y
In [275]:
p = drug_given.ix[one_drug[one_drug].index]['IL2/IF']
clin = clinical.clinical.ix[p[p].index][['tumor_grade','calcium_level','histo_grade','neo_status','hemoglobin','gender','tissuesourcesite','age']]
tf = clinical.timeline.ix[p[p].index].sort(columns='daystodeath').ix[:,:2]
tf.join(clin)
Out[275]:
daystodeath daystolastfollowup tumor_grade calcium_level histo_grade neo_status hemoglobin gender tissuesourcesite age
TCGA-B0-4841 203 139 t2 low g3 with tumor low male b0 63.28
TCGA-BP-4335 460 460 t3a low g3 with tumor low female bp 65.41
TCGA-CJ-4923 572 572 t3a NaN g4 with tumor normal female cj 63.98
TCGA-B8-4143 709 709 t3a elevated g3 with tumor low female b8 66.12
TCGA-B0-4846 1199 1199 t3a normal g2 with tumor normal male b0 52.44
TCGA-CZ-4857 1432 1432 t3a low g3 with tumor low male cz 56.76
TCGA-CW-5580 1964 1964 t3a NaN g3 NaN NaN female cw 73.14
TCGA-BP-5201 NaN 951 t3b low g4 with tumor low male bp 63.36
TCGA-CJ-4871 NaN 2422 t3a normal g4 tumor free low male cj 63.55
TCGA-CJ-5682 NaN 1883 t3a normal g4 with tumor normal male cj 60.94
TCGA-CW-5591 NaN 2270 t3a NaN g2 with tumor NaN male cw 56.75
TCGA-CZ-5460 NaN 1430 t3b low g2 with tumor low male cz 55.87
In [282]:
c = mut.df.ix[:,tf.index].dropna(1).columns
In [283]:
tf.join(clin).ix[c]
Out[283]:
daystodeath daystolastfollowup tumor_grade calcium_level histo_grade neo_status hemoglobin gender tissuesourcesite age
TCGA-CJ-4923 572 572 t3a NaN g4 with tumor normal female cj 63.98
TCGA-B8-4143 709 709 t3a elevated g3 with tumor low female b8 66.12
TCGA-CW-5580 1964 1964 t3a NaN g3 NaN NaN female cw 73.14
TCGA-BP-5201 NaN 951 t3b low g4 with tumor low male bp 63.36
TCGA-CJ-5682 NaN 1883 t3a normal g4 with tumor normal male cj 60.94
TCGA-CW-5591 NaN 2270 t3a NaN g2 with tumor NaN male cw 56.75
TCGA-CZ-5460 NaN 1430 t3b low g2 with tumor low male cz 55.87
In [291]:
p1 = array(['TCGA-CJ-4923','TCGA-B8-4143'])
p2 = array(['TCGA-CW-5580','TCGA-CJ-5682','TCGA-CW-5591','TCGA-CZ-5460'])
In [288]:
pd.Series(np.ones_like(p1), p1)
---------------------------------------------------------------------------
NotImplementedError                       Traceback (most recent call last)
<ipython-input-288-5bb02331bfab> in <module>()
----> 1 pd.Series(np.ones_like(p1), p1)

NotImplementedError: Not implemented for this type
In [295]:
pat = pd.concat([pd.Series(np.ones(len(p1)), p1), 2.*pd.Series(np.ones(len(p2)), p2)])
pat.name = 'IL2'
draw_survival_curves(pat, surv, show=True)
Out[295]:
In [296]:
pat = pat.map({1: 'bad', 2: 'good'})
In [305]:
p = mut.df.ix[:,pat.index].dropna(axis=1)
In [306]:
good_counts = p.ix[:, pat[pat=='good'].index].dropna(axis=1).sum(1)
good_counts = good_counts[good_counts > 0].order()
In [307]:
bad_counts = p.ix[:, pat[pat=='bad'].index].dropna(axis=1).sum(1)
bad_counts = bad_counts[bad_counts > 0].order()
In [308]:
gb = pd.concat([good_counts, bad_counts], axis=1, keys=['good','bad']).fillna(0)
In [322]:
gb.bad.order().dropna().tail(2)
Out[322]:
ACSBG2    2
BAP1      2
Name: bad
In [357]:
gb.ix[gb.bad.order().dropna().tail(2).index]
Out[357]:
good bad
ACSBG2 0 2
BAP1 1 2
In [263]:
gb[(gb.good - gb.bad).abs() > 2]
Out[263]:
good bad
BIOCARTA_AGR_PATHWAY 3 0
KEGG_TGF_BETA_SIGNALING_PATHWAY 3 0
REACTOME_MEMBRANE_TRAFFICKING 3 0
SIG_PIP3_SIGNALING_IN_CARDIAC_MYOCTES 3 0
ST_INTEGRIN_SIGNALING_PATHWAY 3 0
In [242]:
mut.df.ix['SETD2'].ix[metastatic].value_counts()
Out[242]:
0    28
1     9
In [244]:
pd.crosstab(pd.Series(ones_like(mut.df.columns), mut.df.columns), stage)
Out[244]:
feature Stage I Stage II Stage III Stage IV
row_0
1 140 28 73 37
In [346]:
f = pd.concat([1.*(mut.df.ix['SETD2']), 2.*(mut.df.ix['BAP1']>0)], axis=1).sum(1)
f = f[(f > 0) * (f < 3)]
f.name = 'SETD2 vs. BAP1'
In [356]:
draw_survival_curves(mut.df.ix['ACSBG2'].ix[metastatic], surv, show=True)
Out[356]:
In [352]:
}draw_survival_curves(f.ix[metastatic], surv, show=True, labels=['SETD2','BAP1'])
Out[352]:
In [219]:
(gb.bad - gb.good).order().dropna()
Out[219]:
MUC4      -1
MYH1      -1
SETD2     -1
ABCA13     0
ANKS1B     0
BARD1      0
CDON       0
GPR98      0
PBRM1      0
PKHD1L1    0
POMZP3     0
In [264]:
s = pd.Series({m: anova(pat, vec) for m,vec in rppa.features.iterrows()})
s = s.order()
In [274]:
f = rppa.features.ix[s.index[0], metastatic]
f.name = str(f.name)
draw_survival_curves(f, surv, show=True, q=.5)
Out[274]:
In [265]:
violin_plot_pandas(pat, rppa.features.ix[s.index[0]]);
In [172]:
s
Out[172]:
KEGG_CELL_CYCLE                                                 0.02
BIOCARTA_PPARA_PATHWAY                                          0.03
KEGG_LYSOSOME                                                   0.03
KEGG_NON_HOMOLOGOUS_END_JOINING                                 0.03
KEGG_OOCYTE_MEIOSIS                                             0.03
KEGG_PROGESTERONE_MEDIATED_OOCYTE_MATURATION                    0.03
REACTOME_APCDC20_MEDIATED_DEGRADATION_OF_CYCLIN_B               0.03
REACTOME_APOPTOTIC_EXECUTION_PHASE                              0.03
REACTOME_AUTODEGRADATION_OF_CDH1_BY_CDH1_APC                    0.03
REACTOME_CDC20_PHOSPHO_APC_MEDIATED_DEGRADATION_OF_CYCLIN_A     0.03
REACTOME_CONVERSION_FROM_APC_CDC20_TO_APC_CDH1_IN_LATE_ANAPHASE    0.03
REACTOME_GENES_INVOLVED_IN_APOPTOTIC_CLEAVAGE_OF_CELLULAR_PROTEINS    0.03
REACTOME_GLUCOSE_TRANSPORT                                      0.03
REACTOME_HOST_INTERACTIONS_OF_HIV_FACTORS                       0.03
REACTOME_INACTIVATION_OF_APC_VIA_DIRECT_INHIBITION_OF_THE_APCOMPLEX    0.03
...
SIG_IL4RECEPTOR_IN_B_LYPHOCYTES                    NaN
SIG_INSULIN_RECEPTOR_PATHWAY_IN_CARDIAC_MYOCYTES   NaN
SPEN                                               NaN
ST_ADRENERGIC                                      NaN
ST_B_CELL_ANTIGEN_RECEPTOR                         NaN
ST_DIFFERENTIATION_PATHWAY_IN_PC12_CELLS           NaN
ST_ERK1_ERK2_MAPK_PATHWAY                          NaN
ST_GAQ_PATHWAY                                     NaN
ST_GRANULE_CELL_SURVIVAL_PATHWAY                   NaN
ST_INTERLEUKIN_4_PATHWAY                           NaN
ST_JAK_STAT_PATHWAY                                NaN
ST_PHOSPHOINOSITIDE_3_KINASE_PATHWAY               NaN
ST_STAT3_PATHWAY                                   NaN
ST_TYPE_I_INTERFERON_PATHWAY                       NaN
ST_WNT_CA2_CYCLIC_GMP_PATHWAY                      NaN
Length: 639
In [85]:
split_cols = lambda s: ','.join([d for i,d in enumerate(drug_given.columns) if s[i] == '1'])
t = get_surv_fit(surv, gc[one_drug[one_drug].index])
t.columns = pd.MultiIndex.from_tuples([('','# Patients'), ('','# Deaths'), 
                           ('', 'Median OS'), ('95% Confidence Int.', 'Lower'),
                           ('95% Confidence Int.', 'Upper')])
t.index = map(split_cols, t.index)
t
Out[85]:
95% Confidence Int.
# Patients # Deaths Median OS Lower Upper
mTORi 1 1 0.91 NaN NaN
Vaccine 3 3 1.78 1.57 NaN
TKI 15 9 2.95 0.94 NaN
IL2/IF 12 6 3.92 1.94 NaN
Chemo 1 1 0.25 NaN NaN
In [84]:
draw_survival_curves(gc[one_drug[one_drug].index], surv, colors=['red','orange','green','purple','blue','yellow'], 
                     labels=[c for c in drug_given.columns if vc[c] > 0][::-1], show=True, show_legend='out')
Out[84]:
In [98]:
gc.value_counts()
Out[98]:
001000    31
010000    16
001001     5
000010     3
011000     2
110100     2
101100     2
000001     2
000000     2
110000     2
001010     1
111010     1
011110     1
010001     1
001101     1
001100     1
111101     1
111100     1
111110     1
000101     1
011001     1
100000     1
In [151]:
drug_lists = gc.apply(lambda s: ','.join([d for i,d in enumerate(drug_given.columns) if s[i] == '1']))
In [58]:
pd.crosstab(drug_lists, stage).ix[1:].T.plot(kind='bar')
Out[58]:
<matplotlib.axes.AxesSubplot at 0x95dc210>
In [194]:
mut = cancer.load_data('MAF')
mut.uncompress()
meth = cancer.load_data('Methylation')
cn = cancer.load_data('CN_broad')
cn.uncompress()
rna = cancer.load_data('mRNASeq')
rppa  = cancer.load_data('RPPA')
In [47]:
vhl_mut = mut.df.ix['VHL']
vhl_mut.name = 'VHL_mut'
vhl_meth = meth.df.ix['VHL']
vhl_meth.name = 'VHL_meth'
vhl_rna = rna.df.ix['VHL']
vhl_rna.name = 'VHL_rna'
In [20]:
cdk_del = cn.df.ix['Deletion'].ix['9p21.3'].ix[0]
cdk_del.name = 'del_band'
In [21]:
draw_survival_curves_split(cdk_del, clinical.clinical.tumor_stage, surv, ann='p', show=True)
Out[21]:
In [22]:
draw_survival_curves_split(vhl_mut, stage, surv, ann='p', q=.25, show=True)
Out[22]:
In [272]:
draw_survival_curves(f, surv, ann='p', show=True)
Out[272]:

Mutations in Stage 4

In [26]:
non_met = stage[stage.isin(['Stage I','Stage II','Stage III'])].index
In [32]:
survival_test = 'survival_5y'
covariates = ['age', ('mutation', 'rate_non')]
cov_df = global_vars.join(clinical.clinical, how='outer')
cov_df = cov_df[covariates]
remerge = lambda s: '__'.join(s) if type(s) != str else s
cov_df = cov_df.rename(columns=remerge)
surv = clinical.survival[survival_test]
test = SurvivalTest(surv, cov_df)
test.name = survival_test
test.check_feature = lambda s: True
In [33]:
df = mut.features.ix[:,non_met]
df = df.dropna(axis=1)
counts = Series(df.sum(1), name='counts')
df  = df[counts > 6]
In [34]:
#del get_cox_ph_ms.null_model
mut_met = run_feature_matrix(df, test)
mut_met = mut_met.join(counts).sort(columns=[('Full','LR')])
In [39]:
mut_met.head(10).astype(object)
Out[39]:
(Full, LR) (Full, LR_q) (Full, fmla) (Univariate, hazzard) (Univariate, p) (Univariate, q) counts
REACTOME_MITOTIC_PROMETAPHASE 4.28e-05 0.0274 Surv(days, event) ~ feature + mutation__rate_n... 0.468 0.0569 0.774 83
BIOCARTA_CASPASE_PATHWAY 0.000445 0.142 Surv(days, event) ~ feature + mutation__rate_n... 0.318 0.258 0.971 13
BIOCARTA_FAS_PATHWAY 0.00128 0.182 Surv(days, event) ~ feature + mutation__rate_n... 1.72 0.171 0.971 29
KEGG_BUTANOATE_METABOLISM 0.00161 0.182 Surv(days, event) ~ feature + mutation__rate_n... 0.385 0.19 0.971 24
REACTOME_MITOTIC_M_M_G1_PHASES 0.00194 0.182 Surv(days, event) ~ feature + mutation__rate_n... 0.583 0.123 0.956 99
REACTOME_SEMA3A_PLEXIN_REPULSION_SIGNALING_BY_INHIBITING_INTEGRIN_ADHESION 0.00202 0.182 Surv(days, event) ~ feature * mutation__rate_n... 0.254 0.176 0.971 22
MUC16 0.00254 0.182 Surv(days, event) ~ feature + mutation__rate_n... 3.81 0.000313 0.2 28
REACTOME_ASSOCIATION_OF_TRIC_CCT_WITH_TARGET_PROTEINS_DURING_BIOSYNTHESIS 0.00276 0.182 Surv(days, event) ~ feature + mutation__rate_n... 0.428 0.244 0.971 21
REACTOME_SIGNALING_BY_NOTCH 0.00282 0.182 Surv(days, event) ~ feature + mutation__rate_n... 0.486 0.477 0.976 14
REACTOME_CYTOSOLIC_TRNA_AMINOACYLATION 0.00309 0.182 Surv(days, event) ~ feature + mutation__rate_n... 0.3 0.234 0.971 17
In [40]:
import Reports.Figures as F
reload(F)
from Reports.Figures import *
In [41]:
from Reports.NotebookTools import *
In [49]:
met = stage == 'Stage IV'
met = met.map({True: 'Yes', False: 'No'})
met.name = 'Metastatic'
In [50]:
def draw_me(f):
    split_by_stage = draw_survival_curves(mut.features.ix[f], surv, met , ann='p', show=True)
    all_surv = draw_survival_curves(mut.features.ix[f], surv, ann='p', show=True)
    curves = draw_survival_curves(mut.features.ix[f, df.columns], surv, ann='p', filename='tmp.png', show=True)
    try:
        figsize=(6,4)
        pathway_plot(mut.df.ix[run.gene_sets[f], df.columns], plt.gca())
        plt.tight_layout()
        plt.savefig('tmp1.png', dpi=75, bbox_inches=0, pad_inches=0)
        plt.close('all')
        return stack([side_by_side(['tmp.png', 'tmp1.png']), split_by_stage, all_surv])
    except:
        return stack([curves, split_by_stage, all_surv])
    
s = stack([draw_me(f) for f in mut_met.index[:15]])
s
Out[50]:

In [51]:
df = cn.features.ix[:,non_met]
df = df.dropna(axis=1)
counts = Series((df != 0).sum(1), name='counts')
df  = df[counts > 5]
In [53]:
survival_test = 'survival_5y'
covariates =  ['age', ('cna', 'chrom_instability')]
cov_df = global_vars.join(clinical.clinical, how='outer')
cov_df = cov_df[covariates]
remerge = lambda s: '__'.join(s) if type(s) != str else s
cov_df = cov_df.rename(columns=remerge)
surv = clinical.survival[survival_test]
test = SurvivalTest(surv, cov_df)
test.name = survival_test
test.check_feature = lambda s: True
In [54]:
cna_met = run_feature_matrix(df, test)
cna_met = cna_met.join(counts).sort(columns=[('Full','LR_q')])
In [59]:
cna_met.head(20)
Out[59]:
(Full, LR) (Full, LR_q) (Full, fmla) (Univariate, hazzard) (Univariate, p) (Univariate, q) counts
Deletion 9p21.3 (CDKN2A, CDKN2B) 5.52e-04 0.03 Surv(days, event) ~ age + feature + age:feature\n 0.46 9.45e-05 0.01 118
Lesion 9.19e-04 0.03 Surv(days, event) ~ age + feature + age:feature\n 0.43 2.36e-04 0.01 109
9p23 Lesion 1.38e-03 0.03 Surv(days, event) ~ age + feature + cna__chrom... 0.45 3.99e-04 0.01 106
Amplification 5q35.2 (ARL10, BOD1, C5orf25, C5orf47, CDHR2, CLTB, CPEB4, FAF2, FGFR4, HIGD2A, HRH2, KIAA1191, NOP16, NSD1, RNF44, SFXN1, THOC3, TSPAN17, UIMC1, ZNF346) 2.34e-03 0.03 Surv(days, event) ~ age + feature + age:feature\n 0.64 7.53e-03 0.12 256
5q35.3 (AGXT2L2, B4GALT7, C5orf45, CANX, CLK4, CNOT6, COL23A1, DDX41, FAM193B, GNB2L1, GRK6, GRM6, HNRNPAB, HNRNPH1, LMAN2, LOC202181, LOC729678, LTC4S, MAML1, MAPK9, MGAT1, MGAT4B, NHP2, PRELID1, RAB24, RGS14, RMND5B, RNF130, RUFY1, SLC34A1, SQSTM1, TBC1D9B, TMED9, TRIM41, TRIM52, ZFP2, ZFP62, ZNF354A, ZNF354B, ZNF454, ZNF879) 2.52e-03 0.03 Surv(days, event) ~ age + feature + age:feature\n 0.65 9.00e-03 0.12 255
5q34 (CCNG1, MAT2B, NUDCD2, PANK3, RARS, WWC1) 3.71e-03 0.04 Surv(days, event) ~ age + feature + age:feature\n 0.66 1.69e-02 0.16 252
5q33.3 (ADAM19, C5orf54, CLINT1, CYFIP2, FABP6, HAVCR1, ITK, LSM11, MED7, PWWP2A, RNF145, SLU7, THG1L, TIMD4, TTC1, UBLCP1) 4.91e-03 0.04 Surv(days, event) ~ age + feature + age:feature\n 0.67 2.16e-02 0.18 251
5q35.1 (ATP6V0E1, BNIP1, C5orf41, CCDC99, DUSP1, ERGIC1, FAM196B, FBXW11, LCP2, LOC100268168, LOC257358, NEURL1B, NPM1, RANBP17, RPL26L1, UBTD2) 5.27e-03 0.04 Surv(days, event) ~ age + feature + age:feature\n 0.66 1.41e-02 0.16 255
Lesion 6.99e-03 0.05 Surv(days, event) ~ age + feature + age:feature\n 0.66 4.19e-02 0.26 255
5q33.2 (CNOT8, FAM114A2, GEMIN5, LARP1, MFAP3, MRPL22, SAP30L) 9.46e-03 0.06 Surv(days, event) ~ age + feature + age:feature\n 0.70 3.93e-02 0.26 246
5q33.1 (ANXA6, ATOX1, CCDC69, DCTN4, G3BP1, GM2A, GPX3, NDST1, RBM22, RPS14, SLC36A1, TNIP1) 2.73e-02 0.17 Surv(days, event) ~ age + feature\n 0.72 5.66e-02 0.32 248
5q32 (ABLIM3, AFAP1L1, ARHGEF37, CD74, CSNK1A1, FBXO38, GRPEL2, HMGXB3, LARS, PDE6A, PLAC8L1, PPARGC1B, PRELID2, RBM27, SLC26A2, SLC6A7, TCERG1, TCOF1, TIGD6) 3.63e-02 0.21 Surv(days, event) ~ age + feature\n 0.73 6.95e-02 0.36 250
5q31.3 (ANKHD1, APBB3, ARHGAP26, C5orf32, DIAPH1, DND1, EIF4EBP3, FCHSD1, HARS, HARS2, HDAC3, IK, KCTD16, KIAA0141, NDFIP1, NDUFA2, NR3C1, PCDHA10, PCDHB10, PCDHB12, PCDHB13, PCDHB17, PCDHB6, PCDHB7, PCDHB8, PCDHB9, PCDHGA1, PCDHGA10, PCDHGA4, PCDHGA5, PCDHGA6, PCDHGA7, PCDHGA8, PCDHGB2, PCDHGB3, PCDHGB7, PCDHGB8P, PCDHGC3, PCDHGC4, PFDN1, RNF14, SLC35A4, TAF7, WDR55, YIPF5, ZMAT2) 6.92e-02 0.34 Surv(days, event) ~ age + feature\n 0.77 1.17e-01 0.51 249
5q31.2 (BRD8, CDC23, CTNNA1, CXXC5, DNAJC18, ECSCR, ETF1, FAM13B, FAM53C, HNRNPA0, HSPA9, KDM3B, LRRTM2, MATR3, MYOT, NME5, NPY6R, PAIP2, PKD2L2, PURA, SIL1, SLC23A1, SPATA24, UBE2D2) 7.50e-02 0.34 Surv(days, event) ~ age + feature\n 0.77 1.31e-01 0.51 248
5q31.1 (AFF4, C5orf15, C5orf20, C5orf24, C5orf56, CAMLG, CATSPER3, CDKL3, CDKN2AIPNL, CXCL14, DDX46, FBXL21, FNIP1, H2AFY, HSPA4, IRF1, KIF3A, LEAP2, LOC389332, P4HA2, PCBD2, PHF15, PPP2CA, RAD50, RAPGEF6, SAR1B, SEC24A, SEPT8, SHROOM1, SKP1, SLC22A4, SLC22A5, SMAD5, TIFAB, TXNDC15, UBE2B, VDAC1, ZCCHC10) 7.54e-02 0.34 Surv(days, event) ~ age + feature\n 0.79 1.44e-01 0.52 247
5q23.3 (CDC42SE2, FBN2, FLJ33630, HINT1, ISOC1, LYRM7, SLC12A2) 8.49e-02 0.36 Surv(days, event) ~ age + feature\n 0.80 1.71e-01 0.58 245
Deletion 4q34.3 Lesion 9.20e-02 0.37 Surv(days, event) ~ age + feature + cna__chrom... 0.57 3.94e-02 0.26 54
Amplification 5q23.2 (ALDH7A1, CEP120, CSNK1G3, CTXN3, GRAMD3, PHAX, PRRC1, SNX2, SNX24, ZNF608) 9.98e-02 0.38 Surv(days, event) ~ age + feature\n 0.82 2.32e-01 0.72 240
5q23.1 (AQPEP, COMMD10, DMXL1, DTWD2, HSD17B4, SEMA6A, SRFBP1, TNFAIP8) 1.46e-01 0.52 Surv(days, event) ~ age + feature\n 0.85 3.39e-01 0.90 236
Deletion 3p12.2 Lesion 1.82e-01 0.62 Surv(days, event) ~ age + feature + cna__chrom... 0.70 1.35e-01 0.51 242
In [56]:
from Data.Firehose import get_gistic_gene_matrix

Legend

  • Blue -> Normal
  • Orange -> Amplified
  • Red -> High Amplificaiton
  • Purple -> Deletion
  • Black -> Homozygous Deleiton
In [57]:
def draw_me(f):
    feature = cn.features.ix[f, df.columns]
    feature.name = str(feature.name)
    
    labels = Series({-2: 'Homozygous Deletion', -1: 'Deletion', 0: 'Normal', 1: 'Amp', 2: 'High Amp'})
    colors = Series({-2: 'black', -1: 'purple', 0: 'blue', 1: 'orange', 2: 'red'})
    curves = draw_survival_curves(feature, surv, colors=colors[sorted(feature.unique())].tolist(), 
                                  labels=labels[sorted(feature.unique())].tolist(), show=True, show_legend=True,
                                  ann='p')
    feature = cn.features.ix[f]
    feature.name = str(feature.name)
    split_by_stage = draw_survival_curves(feature, surv, stage, colors=colors[sorted(feature.unique())].tolist(), 
                                  labels=labels[sorted(feature.unique())].tolist(), show=True, show_legend=True,
                                  ann='p')
    all_surv = draw_survival_curves(feature, surv, colors=colors[sorted(feature.unique())].tolist(), 
                                  labels=labels[sorted(feature.unique())].tolist(), show=True, show_legend=True,
                                  ann='p')
    return stack([curves, split_by_stage, all_surv])

s = stack([draw_me(f) for f in cna_met.index[:10]])
s
Out[57]:

In [60]:
survival_test = 'survival_5y'
covariates =  ['age']
cov_df = global_vars.join(clinical.clinical, how='outer')
cov_df = cov_df[covariates]
remerge = lambda s: '__'.join(s) if type(s) != str else s
cov_df = cov_df.rename(columns=remerge)
surv = clinical.survival[survival_test]
test = SurvivalTest(surv, cov_df)
test.name = survival_test
test.check_feature = lambda s: True
In [61]:
df = rppa.features.ix[:,non_met]
df = df.dropna(axis=1)
counts = Series((df != 0).sum(1), name='counts')
In [62]:
rppa_met = run_feature_matrix(df, test)
rppa_met = rppa_met.join(counts).sort(columns=[('Full','LR')])
In [63]:
rppa_met.head(10)
Out[63]:
(Full, LR) (Full, LR_q) (Full, fmla) (Univariate, hazzard) (Univariate, p) (Univariate, q) counts
protiens (RAD51, Rad51-M-C) 7.90e-07 2.54e-04 Surv(days, event) ~ age + feature\n 4.60e+00 1.10e-06 1.76e-04 366
(GAB2, GAB2-R-V) 1.91e-06 3.07e-04 Surv(days, event) ~ age + feature\n 4.43e-01 2.70e-07 8.66e-05 366
(SHC1, Shc_pY317-R-NA) 9.61e-06 1.03e-03 Surv(days, event) ~ age + feature\n 2.33e-01 9.41e-06 4.67e-04 366
(IGF1R, IGF-1R-beta-R-C) 1.68e-05 1.24e-03 Surv(days, event) ~ age + feature\n 3.35e-01 3.99e-05 6.57e-04 366
pathways SIG_BCR_SIGNALING_PATHWAY 1.93e-05 1.24e-03 Surv(days, event) ~ age + feature\n 1.60e-05 1.69e-05 5.65e-04 366
REACTOME_CELL_SURFACE_INTERACTIONS_AT_THE_VASCULAR_WALL 2.55e-05 1.36e-03 Surv(days, event) ~ age + feature\n 2.73e-05 1.02e-05 4.67e-04 366
protiens (CDKN1A, p21-R-C) 3.79e-05 1.74e-03 Surv(days, event) ~ age * feature\n 5.87e+00 7.81e-05 9.64e-04 366
(SRC, Src_pY527-R-V) 4.36e-05 1.75e-03 Surv(days, event) ~ age + feature\n 5.12e-01 2.09e-06 2.24e-04 366
pathways KEGG_ENDOCYTOSIS 6.76e-05 2.36e-03 Surv(days, event) ~ age + feature\n 2.56e-05 3.76e-05 6.57e-04 366
protiens (AR, AR-R-V) 7.72e-05 2.36e-03 Surv(days, event) ~ age + feature\n 3.05e-01 1.55e-05 5.65e-04 366
In [66]:
def draw_me(f):
    feature = rppa.features.ix[f, df.columns]
    feature.name = str(feature.name)
   
    curves = draw_survival_curves(feature, surv, show=True, show_legend=True, ann='p')
    feature = rppa.features.ix[f]
    feature.name = str(feature.name)
    split_by_stage = draw_survival_curves(feature, surv, stage, show=True, show_legend=True, ann='p')
    all_surv = draw_survival_curves(feature, surv, show=True, show_legend=True, ann='p')
    return stack([curves, split_by_stage, all_surv])

s = stack([draw_me(f) for f in rppa_met.index[:10]])
s
Out[66]:

Methylation

In [67]:
survival_test = 'survival_5y'
covariates =  ['age', ('methylation', 'pc1')]
cov_df = global_vars.join(clinical.clinical, how='outer')
cov_df = cov_df[covariates]
remerge = lambda s: '__'.join(s) if type(s) != str else s
cov_df = cov_df.rename(columns=remerge)
surv = clinical.survival[survival_test]
test = SurvivalTest(surv, cov_df)
test.name = survival_test
test.check_feature = lambda s: True
In [69]:
df = meth.features.ix[:,non_met]
df = df.dropna(axis=1)
meth_met = run_feature_matrix(df, test)
meth_met = meth_met.sort(columns=[('Full','LR')])
In [70]:
meth_met.head(10)
Out[70]:
Full Univariate
LR LR_q fmla hazzard p q
REACTOME_ZINC_INFLUX_INTO_CELLS_BY_THE_SLC39_GENES_FAMILY 0.00 0.67 Surv(days, event) ~ feature + methylation__pc1... 0.00 1.14e-02 0.49
BIOCARTA_VITCB_PATHWAY 0.01 0.67 Surv(days, event) ~ feature + methylation__pc1... 0.01 9.85e-02 0.59
BIOCARTA_AHSP_PATHWAY 0.01 0.67 Surv(days, event) ~ feature + methylation__pc1... 41.07 1.60e-01 0.59
REACTOME_SEMA3A_PLEXIN_REPULSION_SIGNALING_BY_INHIBITING_INTEGRIN_ADHESION 0.02 0.67 Surv(days, event) ~ feature + methylation__pc1... 0.04 2.44e-01 0.65
BIOCARTA_SODD_PATHWAY 0.02 0.67 Surv(days, event) ~ feature + methylation__pc1... 0.01 7.91e-02 0.59
KEGG_TAURINE_AND_HYPOTAURINE_METABOLISM 0.02 0.67 Surv(days, event) ~ feature + methylation__pc1... 903.41 2.19e-03 0.22
BIOCARTA_FREE_PATHWAY 0.02 0.67 Surv(days, event) ~ feature + age\n 247.53 7.31e-04 0.14
REACTOME_INITIAL_TRIGGERING_OF_COMPLEMENT 0.03 0.68 Surv(days, event) ~ feature + methylation__pc1... 0.03 1.37e-01 0.59
BIOCARTA_LYM_PATHWAY 0.04 0.68 Surv(days, event) ~ feature + methylation__pc1... 0.03 1.25e-01 0.59
BIOCARTA_MONOCYTE_PATHWAY 0.04 0.68 Surv(days, event) ~ feature + methylation__pc1... 106.58 1.24e-01 0.59
In [71]:
n = meth_met.index[0]
print n
Image(filename='{}/Figures/PathwayPlots/{}.png'.format(meth.path, n))
REACTOME_ZINC_INFLUX_INTO_CELLS_BY_THE_SLC39_GENES_FAMILY
Out[71]:
In [72]:
n = meth_met.index[1]
print n
Image(filename='{}/Figures/PathwayPlots/{}.png'.format(meth.path, n))
BIOCARTA_VITCB_PATHWAY
Out[72]:
In [73]:
n = meth_met.index[2]
print n
Image(filename='{}/Figures/PathwayPlots/{}.png'.format(meth.path, n))
BIOCARTA_AHSP_PATHWAY
Out[73]:
In [74]:
n = meth_met.index[3]
print n
Image(filename='{}/Figures/PathwayPlots/{}.png'.format(meth.path, n))
REACTOME_SEMA3A_PLEXIN_REPULSION_SIGNALING_BY_INHIBITING_INTEGRIN_ADHESION
Out[74]:
In [75]:
n = meth_met.index[4]
print n
Image(filename='{}/Figures/PathwayPlots/{}.png'.format(meth.path, n))
BIOCARTA_SODD_PATHWAY
Out[75]:
In [76]:
n = meth_met.index[5]
print n
Image(filename='{}/Figures/PathwayPlots/{}.png'.format(meth.path, n))
KEGG_TAURINE_AND_HYPOTAURINE_METABOLISM
Out[76]:
In [77]:
n = meth_met.index[6]
print n
Image(filename='{}/Figures/PathwayPlots/{}.png'.format(meth.path, n))
BIOCARTA_FREE_PATHWAY
Out[77]:
In [78]:
n = meth_met.index[7]
print n
Image(filename='{}/Figures/PathwayPlots/{}.png'.format(meth.path, n))
REACTOME_INITIAL_TRIGGERING_OF_COMPLEMENT
Out[78]:
In [79]:
n = meth_met.index[8]
print n
Image(filename='{}/Figures/PathwayPlots/{}.png'.format(meth.path, n))
BIOCARTA_LYM_PATHWAY
Out[79]:
In [80]:
n = meth_met.index[8]
print n
Image(filename='{}/Figures/PathwayPlots/{}.png'.format(meth.path, n))
BIOCARTA_LYM_PATHWAY
Out[80]:
In [81]:
n = meth_met.index[9]
print n
Image(filename='{}/Figures/PathwayPlots/{}.png'.format(meth.path, n))
BIOCARTA_MONOCYTE_PATHWAY
Out[81]:
In [82]:
def draw_me(f):
    feature = meth.features.ix[f, df.columns]
    feature.name = str(feature.name)
   
    curves = draw_survival_curves(feature, surv, show=True, show_legend=True, ann='p')
    feature = meth.features.ix[f]
    feature.name = str(feature.name)
    split_by_stage = draw_survival_curves(feature, surv, stage, show=True, show_legend=True, ann='p')
    pathway = Image(filename='{}/Figures/PathwayPlots/{}.png'.format(meth.path, f))
    all_surv = draw_survival_curves(feature, surv, show=True, show_legend=True, ann='p')
    return stack([curves, split_by_stage, all_surv])

s = stack([draw_me(f) for f in meth_met.index[:10]])
s
Out[82]:

Expression

In [83]:
survival_test = 'survival_5y'
covariates =  ['age']
cov_df = global_vars.join(clinical.clinical, how='outer')
cov_df = cov_df[covariates]
remerge = lambda s: '__'.join(s) if type(s) != str else s
cov_df = cov_df.rename(columns=remerge)
surv = clinical.survival[survival_test]
test = SurvivalTest(surv, cov_df)
test.name = survival_test
test.check_feature = lambda s: True
In [84]:
df = rna.features.ix[:, non_met]
df = df.dropna(axis=1)
rna_met = run_feature_matrix(df, test)
rna_met = rna_met.sort(columns=[('Full','LR')])
In [85]:
rna_met.head(10)
Out[85]:
Full Univariate
LR LR_q fmla hazzard p q
REACTOME_VIRAL_MESSENGER_RNA_SYNTHESIS 4.54e-04 0.10 Surv(days, event) ~ age * feature\n 1.79e-04 7.93e-04 0.11
BIOCARTA_ERYTH_PATHWAY 6.78e-04 0.10 Surv(days, event) ~ age + feature\n 6.67e+02 7.93e-03 0.43
REACTOME_ABORTIVE_ELONGATION_OF_HIV1_TRANSCRIPT_IN_THE_ABSENCE_OF_TAT 7.03e-04 0.10 Surv(days, event) ~ age * feature\n 3.47e-04 3.87e-04 0.08
BIOCARTA_LECTIN_PATHWAY 9.91e-04 0.10 Surv(days, event) ~ age + feature\n 1.17e+04 4.08e-04 0.08
REACTOME_NCAM1_INTERACTIONS 1.52e-03 0.10 Surv(days, event) ~ age * feature\n 3.32e+02 2.56e-02 0.58
BIOCARTA_INTRINSIC_PATHWAY 1.61e-03 0.10 Surv(days, event) ~ age * feature\n 2.61e+02 4.18e-02 0.58
REACTOME_MICRORNA_BIOGENESIS 1.75e-03 0.10 Surv(days, event) ~ age * feature\n 6.93e-04 8.32e-03 0.43
KEGG_FATTY_ACID_METABOLISM 2.41e-03 0.12 Surv(days, event) ~ age + feature\n 3.91e-03 2.76e-02 0.58
BIOCARTA_VITCB_PATHWAY 2.54e-03 0.12 Surv(days, event) ~ age * feature\n 3.26e+02 4.20e-02 0.58
REACTOME_OLFACTORY_SIGNALING_PATHWAY 2.95e-03 0.12 Surv(days, event) ~ age + feature\n 9.82e-27 2.56e-03 0.26
In [86]:
def draw_me(f):
    feature = rna.features.ix[f, df.columns]
    feature.name = str(feature.name)
   
    curves = draw_survival_curves(feature, surv, show=True, show_legend=True, ann='p')
    feature = rna.features.ix[f]
    feature.name = str(feature.name)
    split_by_stage = draw_survival_curves(feature, surv, stage, show=True, show_legend=True, ann='p')
    #pathway = Image(filename='{}/Figures/PathwayPlots/{}.png'.format(meth.path, f))
    all_surv = draw_survival_curves(feature, surv, show=True, show_legend=True, ann='p')
    return stack([curves, split_by_stage, all_surv])

s = stack([draw_me(f) for f in rna_met.index[:10]])
s
Out[86]: